TOS Silver 2000

home *** CD-ROM | disk | FTP | other *** search

/ TOS Silver 2000 / TOS Silver 2000.iso / Anwendun / a2html / a2html-0.81 / source / a2html.c next >

Wrap

C/C++ Source or Header | 1999-08-28 | 51.4 KB | 1,508 lines

/* * $Id: a2html.c,v 1.8 1999/08/28 19:20:35 tommya Exp $ * * Ascii to html * * Search "show_help" for usage. * Tab not used. * * THIS PROGRAM IS PUBLIC DOMAIN (NO COPYRIGHT) * * see changes list of changes. * * Created by Tommy Andersen 1997,98,99 * Compiler: Lattice C (Atari) + GCC (Atari or Linux) */ #ifdef ATARI #include <mintbind.h> #include <osbind.h> #endif #include <strings.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #ifdef ATARI #define MC " Atari" #else #define MC " Linux" #endif #ifdef __MINT__ #define LV " Mintlib" #else #define LV #endif #define VERSION "0.81" MC LV #define WORD_LENGTH 1000 #define HTML_EXTENSION ".html" #define RC_LINE_BUFFER 1000 #define MAX_VAR_LENGTH 50 /* Control these <br> a2html invents */ #define DISALLOW_BR 0 #define ALLOW_BR 1 #define FORCE_BR 2 #if defined(LATTICE) && !defined(__LATTICE__) /* Not using mintlib with Lattice? */ #define strncasecmp strnicmp #endif const unsigned char header_doctype[] = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 3.2 Final//EN\">\n"; const unsigned char header_tags1[] = "<html>\n" \ "<head>\n" \ "<title>"; const unsigned char header_tags2[] = "</title>\n" \ "</head>\n" \ "\n"; const unsigned char pre_start_tag[] = "<pre>\n"; const unsigned char pre_end_tag[] = "</pre>\n"; const unsigned char bottom_tags[] = "</body>\n" \ "</html>\n"; const unsigned char * const url_types[] = { "ftp", "http", "file", "news", "wais", "nntp", "imap", "https", \ "telnet", "mailto", "gopher", "prospero", NULL }; /* If input file is a email, these header lines are displayed */ const unsigned char * const show_emh_field[] = { "date", "from", "reply-to", "to", "cc", "subject", NULL }; char *yes_strings[] = { "1", "yes", "true", "sure", NULL }; char *no_strings[] = { "0", "no", "false", "noway", NULL }; /* * Atari charset to iso8859-1 charset * Just a simple lookup array * The Atari char is used as an index, the lookup value is the iso8859-1 char. */ const unsigned char convertAtariToIso88591[]= { /* 0-9 */ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', /* 10-19 */ 10, ' ', ' ', 13, ' ', ' ', ' ', ' ', ' ', ' ', /* 20-29 */ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', /* 30-39 */ ' ', ' ', ' ', '!', 34, '#', '$', '%', '&', 39, /* 40-49 */ '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', /* 50-59 */ '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', /* 60-69 */ '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', /* 70-79 */ 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', /* 80-89 */ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', /* 90-99 */ 'Z', '[', 92 , ']', '^', '_', '`', 'a', 'b', 'c', /*100-109*/ 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', /*110-119*/ 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', /*120-129*/ 'x', 'y', 'z', '{', '|', '}', '~', ' ', 199, 252, /*130-139*/ 233, 226, 228, 224, 229, 231, 234, 235, 232, 239, /*140-149*/ 238, 236, 196, 197, 201, 230, 198, 244, 246, 242, /*150-159*/ 251, 249, 255, 214, 220, 162, 163, 165, 223, ' ', /*160-169*/ 225, 237, 243, 250, 241, 209, ' ', ' ', 191, ' ', /*170-179*/ 172, 189, 188, 161, 171, 187, 227, 245, 216, 248, /*180-189*/ 230, 198, 192, 195, 213, 168, 180, ' ', 182, 169, /*190-199*/ 174, ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', /*200-209*/ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', /*210-219*/ ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', /*220-229*/ ' ', 167, ' ', ' ', ' ', 223, ' ', ' ', ' ', ' ', /*230-239*/ 181, ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', /*240-249*/ ' ', 177, ' ', ' ', ' ', ' ', 247, ' ', 176, ' ', /*250-255*/ 183, ' ', ' ', 178, 179, 175 }; /* 8 bit iso-8859-1 to 7bit html */ /* lookup string table */ /* " = " = */ const unsigned char * const convertIso88591ToHtml[]= { /* 0-9 */ " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", /* 10-19 */ "\n", " ", " ", NULL, " ", " ", " ", " ", " ", " ", /* 20-29 */ " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", /* 30-39 */ " ", " ", " ", "!", ""","#", "$", "%", "&", "'", /* 40-49 */ "(", ")", "*", "+", ",", "-", ".", "/", "0", "1", /* 50-59 */ "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", /* 60-69 */ "<", "=", ">", "?", "@", "A", "B", "C", "D", "E", /* 70-79 */ "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", /* 80-89 */ "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", /* 90-99 */ "Z", "[", "\\","]", "^", "_", "`", "a", "b", "c", /*100-109*/ "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", /*110-119*/ "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", /*120-129*/ "x", "y", "z", "{", "|", "}", "~", " ", " ", " ", /* 128-159 not used */ /*130-139*/ " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", /*140-149*/ " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", /*150-159*/ " ", " ", " ", " ", " ", " ", " ", " ", " ", " ", /*160-169*/ " ","¡","¢","£","¤","¥","¦","§","¨","©", /*170-179*/ "ª","«","¬","","®","¯","°","±","²","³", /*180-189*/ "´","µ","¶","·","¸","¹","º","»","¼","½", /*190-199*/ "¾","¿","À","Á","Â","Ã","Ä","Å","Æ","Ç", /*200-209*/ "È","É","Ê","Ë","Ì","Í","Î","Ï","Ð","Ñ", /*210-219*/ "Ò","Ó","Ô","Õ","Ö","×","Ø","Ù","Ú","Û", /*220-229*/ "Ü","Ý","Þ","ß","à","á","â","ã","ä","å", /*230-239*/ "æ", "ç","è","é","ê","ë","ì","í","î","ï", /*240-249*/ "ð","ñ","ò","ó","ô","õ","ö","÷","ø","ù", /*250-255*/ "ú","û","ü","ý","þ", "ÿ" }; struct file_entry { char *input_filename; /* file to convert */ char *output_filename; /* where to put the output */ unsigned char *title_html; struct file_entry *next; } *work_list = NULL; struct emh_text_entry { struct emh_text_entry *next; unsigned char *text; }; /* * Prototypes */ int main (int argc, char *[]); void output_error_text (char *); int make_html_file (char *, char*, const unsigned char*); void make_output_filename (const char *, char *, const char*); struct file_entry *create_entry (void); void free_list (struct file_entry *); void show_help (int); void output_char (const int, int, FILE *); void output_string (const int, const char *, FILE *); void output_parse_char (const int, unsigned int, FILE *); void output_parse_string (const int, const char *, FILE *); void output_word (const int, const unsigned char *word, FILE *handle); char *find_filename (char *); void read_rc_file (char *); void set_var (char *variable, char *value); FILE *fopen_rc_file (const char *rc_name, const char *mode); /* * Some default settings */ #define ATARICHARSET 0 #define ISOCHARSET 1 #define TRUE 1 #define FALSE 0 #define NA -1 #ifdef ATARI int method = ATARICHARSET; /* input files uses the Atari charset */ #else int method = ISOCHARSET; /* input files uses the ISO-8859-1 charset */ #endif int quiet = FALSE; /* print errors to stderr */ int pause_required = FALSE; /* don't wait for a keypress */ int crlf = FALSE; /* use un*x lines */ int notags = FALSE; /* use tags in output */ int nopre = FALSE; /* use <pre> tags */ int nolinks = FALSE; /* check for links */ int nomail = FALSE; /* check for emailadress */ int nobr = FALSE; /* use <br> if <nopre> is true */ int doctype = TRUE; /* output html doctype */ int emailsource = FALSE; /* Input file is a normal ascii file */ unsigned char *footer_html = NULL; /* footer text - raw html code */ unsigned char *separator_html = NULL; /* Emailer separator - raw html code */ char *bodybgcolor = NULL; char *bodytext = NULL; char *bodylink = NULL; #define ARGUMENT_IS(x) (!strcmp ((x), argv[a])) #undef max #define max(x,y) ((x) > (y) ? (x) : (y)) int main (int argc, char *argv[]) { int a = 1; int ret = 0; int save_argc = argc; struct file_entry *current, *prev; #ifdef ATARI #if defined(LATTICE) && !defined(__LATTICE__) Pdomain (1); /* mint domain, gcc (MiNT) doesn't need it */ fmode (stdout,1); /* binary output -- no translation VERY IMPORTANT! */ #else stdout->_flag |= _IOBIN; /* force binary output (mintlib) */ #endif #endif read_rc_file ("a2htmlrc"); current = work_list = create_entry (); if (!current) exit (1); /* low memory! Shouldn't be possible */ /* parse command line and build a list of files */ while (--argc > 0) { if (ARGUMENT_IS ("-iso") || ARGUMENT_IS("-iso-8859-1")) method = ISOCHARSET; else if (ARGUMENT_IS ("-atari")) method = ATARICHARSET; else if (ARGUMENT_IS ("-q") || ARGUMENT_IS ("-quiet")) quiet = TRUE; else if (ARGUMENT_IS ("-p") || ARGUMENT_IS ("-pause")) pause_required = TRUE; else if (ARGUMENT_IS ("-crlf")) crlf = TRUE; else if (ARGUMENT_IS ("-notags")) notags = TRUE; else if (ARGUMENT_IS ("-nolinks")) nolinks = TRUE; else if (ARGUMENT_IS ("-nomail")) nomail = TRUE; else if (ARGUMENT_IS ("-nopre")) nopre = TRUE; else if (ARGUMENT_IS ("-nobr")) nobr = TRUE; else if (ARGUMENT_IS ("-nodoctype")) doctype = FALSE; else if (ARGUMENT_IS ("-emailsource")) { emailsource = TRUE; method = ISOCHARSET; /* forced */ } else if (ARGUMENT_IS ("--help")) { show_help (TRUE); if (pause_required) { output_error_text ("\nPress Return\n"); getchar (); } exit (1); } else if (ARGUMENT_IS ("--version")) { show_help (FALSE); if (pause_required) { output_error_text ("\nPress Return\n"); getchar (); } exit (1); } else if (ARGUMENT_IS ("-dumpargs")) { int a=0; char *some_text = (char*) malloc (2000); while (save_argc-- && some_text) { sprintf (some_text, "Arg no. %d is (%s)\n", a, argv[a] ? argv[a] : "nothing"); output_error_text (some_text); a++; } if (some_text) free (some_text); } else if (ARGUMENT_IS ("-footer")) /* footer text specified */ { a++; if (--argc > 0) /* must have an extra argument */ { if (footer_html) free (footer_html); footer_html = (char*) strdup (argv[a]); } } else if (ARGUMENT_IS ("-separator")) /* separator text specified */ { a++; if (--argc > 0) /* must have an extra argument */ { if (separator_html) free (separator_html); separator_html = (char*) strdup (argv[a]); } } else if (ARGUMENT_IS ("-bodybgcolor")) /* body background color specified */ { a++; if (--argc > 0) { if (bodybgcolor) free (bodybgcolor); bodybgcolor = (char*) strdup (argv[a]); } } else if (ARGUMENT_IS ("-bodytext")) /* body text color specified */ { a++; if (--argc > 0) { if (bodytext) free (bodytext); bodytext = (char*) strdup (argv[a]); } } else if (ARGUMENT_IS ("-bodylink")) /* body link color specified */ { a++; if (--argc > 0) { if (bodylink) free (bodylink); bodylink = (char*) strdup (argv[a]); } } else if (ARGUMENT_IS ("-") || ARGUMENT_IS ("-stdout")) /* stdout output */ { if (current && current->output_filename) { free (current->output_filename); current->output_filename = NULL; } } else if (ARGUMENT_IS ("-o") || ARGUMENT_IS ("-out")) /* output filename specified */ { a++; if (--argc > 0) /* must have an extra argument */ { if (current) { if (current->output_filename) free (current->output_filename); current->output_filename = (char*) strdup (argv[a]); } } } else if (ARGUMENT_IS ("-title")) /* new title specified */ { a++; if (--argc > 0) /* must have an extra argument */ { if (current) { if (current->title_html) free (current->title_html); current->title_html = (unsigned char*) strdup (argv[a]); } } } else { /* Well, only files (and illegal switches) comes here */ if (current && current->input_filename) { /* this entry was allready used, so make a new one */ prev = current; current = create_entry (); prev->next = current; } if (current) { current->input_filename = (char*) strdup (argv[a]); /* copy that name */ /* make some space for the output filename */ current->output_filename = (char*) calloc (strlen (current->input_filename) + sizeof (HTML_EXTENSION) + 1, sizeof (char)); /* calculate the outputname */ make_output_filename (current->input_filename, current->output_filename, HTML_EXTENSION); } } a++; } /* while */ #ifdef DEBUG current = work_list; while (current) { fprintf (stderr, "Debug: Filelist = %s %s\n", (current->input_filename) ? current->input_filename : "null", (current->output_filename) ? current->output_filename : "null" ); current = current->next; } #endif /* run through the list and create the files */ current = work_list; while (current) { ret += make_html_file (current->input_filename, current->output_filename, current->title_html); current = current->next; } free_list (work_list); if (footer_html) free (footer_html); if (separator_html) free (separator_html); if (bodybgcolor) free (bodybgcolor); if (bodytext) free (bodytext); if (bodylink) free (bodylink); if (pause_required) { output_error_text ("\nPress Return\n"); getchar (); } return (ret > 0) ? 1 : 0; } /* * Parse it and set variables */ void read_rc_file (char *rc_name) { FILE *fp_rc = fopen_rc_file (rc_name, "r"); if (fp_rc) { char *buffer = (char*) malloc (RC_LINE_BUFFER+1); char *var = (char*) malloc (MAX_VAR_LENGTH+1); char *val = (char*) malloc (RC_LINE_BUFFER+1); if (buffer && var && val) { /* a rc line looks like "variable = value\n" */ while (fgets (buffer, RC_LINE_BUFFER, fp_rc)) { if (buffer[0] != '#') { char *p; /* pointer to work with */ char *p_equal = NULL; /* will later pointer at = */ /* remove trailing spaces and nasty codes from end */ p = buffer + strlen (buffer) - 1; while ((p >= buffer) && (*p <= ' ')) *p-- = '\0'; if (buffer[0]) /* still something left? */ { if ((p_equal = strchr (buffer, '=')) > buffer) { /* find variable name */ *p_equal = '\0'; /* split buffer */ p = buffer + strlen (buffer) - 1; /* remove spaces after variable */ while ((p >= buffer) && (*p <= ' ')) *p-- = '\0'; p = buffer; /* remove spaces before variable */ while (*p && (*p <= ' ')) p++; strncpy (var, p, MAX_VAR_LENGTH); var[MAX_VAR_LENGTH] = '\0'; /* allocated +1, so it's okay */ /* get the value */ p = p_equal + 1; while (*p && (*p <= ' ')) /* skip spaces before value */ p++; strcpy (val, p); if (var[0]) /* allow empty values, but variable must be defined */ set_var (var, val); } } } } } if (buffer) free (buffer); if (var) free (var); if (val) free (val); fclose (fp_rc); } } void set_var (char *variable, char *value) { register int bool_val; if (variable && value) { bool_val = NA; if (value[0]) { char **p = yes_strings; while (*p) { if (!strcmp (value, *p)) bool_val = TRUE; p++; } p = no_strings; while (*p) { if (!strcmp (value, *p)) bool_val = FALSE; p++; } } /* some trivial tests going on here... */ if (!strcmp ("iso", variable) || !strcmp ("iso-8859-1", variable)) { if (bool_val == TRUE) method = ISOCHARSET; else if (bool_val == FALSE) method = ATARICHARSET; } if (!strcmp ("atari", variable)) { if (bool_val == TRUE) method = ATARICHARSET; else if (bool_val == FALSE) method = ISOCHARSET; } if (!strcmp ("quiet", variable)) { if (bool_val == TRUE) quiet = TRUE; else if (bool_val == FALSE) quiet = FALSE; } if (!strcmp ("pause", variable)) { if (bool_val == TRUE) pause_required = TRUE; else if (bool_val == FALSE) pause_required = FALSE; } if (!strcmp ("crlf", variable)) { if (bool_val == TRUE) crlf = TRUE; else if (bool_val == FALSE) crlf = FALSE; } if (!strcmp ("notags", variable)) { if (bool_val == TRUE) notags = TRUE; else if (bool_val == FALSE) notags = FALSE; } if (!strcmp ("nolinks", variable)) { if (bool_val == TRUE) nolinks = TRUE; else if (bool_val == FALSE) nolinks = FALSE; } if (!strcmp ("nomail", variable)) { if (bool_val == TRUE) nomail = TRUE; else if (bool_val == FALSE) nomail = FALSE; } if (!strcmp ("nopre", variable)) { if (bool_val == TRUE) nopre = TRUE; else if (bool_val == FALSE) nopre = FALSE; } if (!strcmp ("nobr", variable)) { if (bool_val == TRUE) nobr = TRUE; else if (bool_val == FALSE) nobr = FALSE; } if (!strcmp ("nodoctype", variable)) { if (bool_val == TRUE) doctype = FALSE; /* pitfall */ else if (bool_val == FALSE) doctype = TRUE; } if (!strcmp ("emailsource", variable)) { if (bool_val == TRUE) { emailsource = TRUE; method = ISOCHARSET; } else if (bool_val == FALSE) emailsource = FALSE; } if (!strcmp ("footer", variable)) { if (footer_html) free (footer_html); footer_html = NULL; if (value[0]) footer_html = (char*) strdup (value); } if (!strcmp ("separator", variable)) { if (separator_html) free (separator_html); separator_html = NULL; if (value[0]) separator_html = (char*) strdup (value); } if (!strcmp ("bodybgcolor", variable)) { if (bodybgcolor) free (bodybgcolor); bodybgcolor = NULL; if (value[0]) bodybgcolor = (char*) strdup (value); } if (!strcmp ("bodytext", variable)) { if (bodytext) free (bodytext); bodytext = NULL; if (value[0]) bodytext = (char*) strdup (value); } if (!strcmp ("bodylink", variable)) { if (bodylink) free (bodylink); bodylink = NULL; if (value[0]) bodylink = (char*) strdup (value); } } } /* * Search for a2htmlrc file in * 1) current working directory * 2) HOME (env variable) * Looks first after a2htmlrc, then .a2htmlrc * Opens the file and returns filepointer */ FILE * fopen_rc_file (const char *rc_name, const char *mode) { char *cwd_path = getcwd ( NULL, 0); /* string is auto. allocated */ char *home_path = getenv ("HOME"); /* string not allocated (just a borrow pointer) */ char *slash_cwd = "/"; char *slash_home = "/"; char *a2htmlrc_file = NULL; FILE *fp_rc; int p_size = 0; if (cwd_path) p_size = (int) strlen (cwd_path); if (home_path) /* Well, just in case it doesn't exist! */ p_size = max (p_size, (int) strlen (home_path)); if (rc_name) p_size += (int) strlen (rc_name); a2htmlrc_file = (char*) malloc (p_size + 3); if (rc_name && a2htmlrc_file && (cwd_path || home_path)) { /* first look for a2htmlrc in current working directory */ if (cwd_path) { /* determine slash type */ #if defined(ATARI) && !defined(__MINT__) /* Using \ in path ?? Not executed in MiNTLib or Linux - could be a BAD THING (TM)*/ if ((cwd_path[1] == ':') || strchr (cwd_path, '\\')) slash_cwd = "\\"; #endif strcpy (a2htmlrc_file, cwd_path); strcat (a2htmlrc_file, slash_cwd); strcat (a2htmlrc_file, rc_name); /* try a2htmlrc */ fp_rc = fopen (a2htmlrc_file, mode); if (!fp_rc) { strcpy (a2htmlrc_file, cwd_path); strcat (a2htmlrc_file, slash_cwd); strcat (a2htmlrc_file, "."); strcat (a2htmlrc_file, rc_name); /* try .a2htmlrc */ fp_rc = fopen (a2htmlrc_file, mode); } } /* then look for it in home directory */ if (!fp_rc && home_path) { /* Hopefully nobody uses \ in fs with / as slash... */ if ((home_path[1] == ':') || strchr (home_path, '\\')) slash_home = "\\"; strcpy (a2htmlrc_file, home_path); strcat (a2htmlrc_file, slash_home); strcat (a2htmlrc_file, rc_name); /* try a2htmlrc in home */ fp_rc = fopen (a2htmlrc_file, mode); if (!fp_rc) { strcpy (a2htmlrc_file, home_path); strcat (a2htmlrc_file, slash_home); strcat (a2htmlrc_file, "."); strcat (a2htmlrc_file, rc_name); /* try .a2htmlrc in home */ fp_rc = fopen (a2htmlrc_file, mode); } } } if (a2htmlrc_file) free (a2htmlrc_file); if (cwd_path) free (cwd_path); return fp_rc; } struct file_entry *create_entry (void) { return (struct file_entry*) calloc (1, sizeof (struct file_entry)); } void free_list (struct file_entry* start) { struct file_entry* next; struct file_entry* current = start; while (current) { next = current->next; if (current->input_filename) free (current->input_filename); if (current->output_filename) free (current->output_filename); if (current->title_html) free (current->title_html); free (current); current = next; } } /* * make ex test.txt into test.html, if new_ext is html * special case: if out_filename is present, no filtype substitution is made. */ void make_output_filename (const char *in_filename, char *out_filename, const char *new_ext) { if (!in_filename || !out_filename || !*in_filename) return; if (!*out_filename) /* inputfile, but no outputfile??? */ { register unsigned char *p = NULL; register int has_ext = FALSE; /* use same name as inputfile */ strcpy (out_filename, in_filename); /* replace extension with html if name has a dot in it */ if (p = strrchr (out_filename, '.')) { /* check that the dot is 'after' / or \ (not a dot in path) */ if ((p > (unsigned char *)strrchr (out_filename, '/')) && (p > (unsigned char *)strrchr (out_filename, '\\')) ) { strcpy (p, new_ext); has_ext = TRUE; } } if (!has_ext) strcat (out_filename, new_ext); /* no ext, let's give it a html extension */ } } void show_help (int all) { #ifdef ATARI output_error_text ( \ "a2html "VERSION ", Compiled "__DATE__".\n" \ "Converts a textfile into an iso-8859-1 (7bit) htmlfile\n" \ "Written by Tommy Andersen (tommya@post3.tele.dk). Public Domain\n" \ ); if (all) output_error_text ( "Usage: a2html [[options] textfile [htmlfile]]\n" \ " Charset options:\n" \ " -iso, -iso-8859-1 Textfile uses iso-8859-1 charset.\n" \ " -atari Textfile uses Atari charset (default).\n" \ " Speciel options:\n" \ " -q, -quiet Quiet mode - no errortext.\n" \ " -p, -pause Wait for a keypress before quit.\n" \ " --help This help.\n" \ " --version Display a2html version.\n" \ " Input options:\n" \ " <textfile> Read from textfile.\n" \ " no <textfile> Read input from stdin.\n" \ " Input type:\n" \ " -emailsource Textfile is an email.\n" \ " Output options:\n" \ " -o <htmlfile>, -out <htmlfile>\n" \ " Write to htmlfile.\n" \ " no <htmlfile> Write to textfile, filetype changed to html.\n" \ " If input is read from stdin, stdout will be used.\n" \ " -, -stdout Force output to stdout.\n" \ " Html options:\n" \ " -crlf Lines with CR + LF.\n" \ " -nodoctype No doctype in htmlfile.\n" \ " -title <string> Use string as title in htmlfile.\n" \ " -bodybgcolor <color>\n" \ " Change background color.\n" \ " -bodytext <color> Change text color.\n" \ " -bodylink <color> Change link color.\n" \ " -notags No <header><title><body> tags in htmlfile.\n" \ " -nopre No <pre> tags in output - <br> are generated.\n"\ " -nobr No <br> if -nopre is used.\n" \ " -footer <string> Use string as footer.\n" \ " -separator <string> Use string as separator (-emailsource).\n" \ " Url options:\n" \ " -nolinks Don't make href in htmlfile.\n" \ " -nomail Don't make mailto in htmlfile.\n" \ ); #else output_error_text ( \ "a2html "VERSION ", Compiled "__DATE__".\n" \ "Converts a textfile into an iso-8859-1 (7bit) htmlfile\n" \ "Written by Tommy Andersen (tommya@post3.tele.dk) Public Domain\n" \ ); if (all) output_error_text ( "Usage: a2html [[options] textfile [htmlfile]]\n" \ " Charset options:\n" \ " -iso, -iso-8859-1 Textfile uses iso-8859-1 charset (default).\n" \ " -atari Textfile uses Atari charset.\n" \ " Speciel options:\n" \ " -q, -quiet Quiet mode - no errortext.\n" \ " -p, -pause Wait for a keypress before quit.\n" \ " --help This help.\n" \ " --version Display a2html version.\n" \ " Input options:\n" \ " <textfile> Read from textfile.\n" \ " no <textfile> Read input from stdin.\n" \ " Input type:\n" \ " -emailsource Textfile is an email.\n" \ " Output options:\n" \ " -o <htmlfile>, -out <htmlfile>\n" \ " Write to htmlfile.\n" \ " no <htmlfile> Write to textfile, filetype changed to html.\n" \ " If input is read from stdin, stdout will be used.\n" \ " -, -stdout Force output to stdout.\n" \ " Html options:\n" \ " -crlf Lines with CR + LF.\n" \ " -nodoctype No doctype in htmlfile.\n" \ " -title <string> Use string as title in htmlfile.\n" \ " -bodybgcolor <color>\n" \ " Change background color.\n" \ " -bodytext <color> Change text color.\n" \ " -bodylink <color> Change link color.\n" \ " -notags No <header><title><body> tags in htmlfile.\n" \ " -nopre No <pre> tags in output - <br> are generated.\n"\ " -nobr No <br> if -nopre is used.\n" \ " -footer <string> Use string as footer.\n" \ " -separator <string> Use string as separator (-emailsource).\n" \ " Url options:\n" \ " -nolinks Don't make href in htmlfile.\n" \ " -nomail Don't make mailto in htmlfile.\n" \ ); #endif } int make_html_file (char *input_filename, char *output_filename, const unsigned char *title) { register int c; unsigned char word[WORD_LENGTH+1]; register FILE *inhandle = NULL; register FILE *outhandle = NULL; /* check for same input file as outputfile */ if (input_filename && output_filename) if (!strcmp (input_filename, output_filename)) { output_error_text ("Error: inputfile same as outputfile!\n"); return 1; } if (input_filename) inhandle = fopen (input_filename, "rb"); else inhandle = stdin; if (inhandle) /* don't create files, if inputfile couldn't be opened */ { if (output_filename) outhandle = fopen (output_filename, "wb"); else outhandle = stdout; } if (inhandle && outhandle) { /* Write Html headers */ if (!notags) { char *some_text = (char*) malloc (1000); if (doctype) output_string (FALSE, header_doctype, outhandle); output_string (FALSE, header_tags1, outhandle); if (title) output_parse_string (FALSE, title, outhandle); else if (input_filename && *input_filename) output_parse_string (FALSE, find_filename (input_filename), outhandle); else output_parse_string (FALSE, "No title", outhandle); output_string (FALSE, header_tags2, outhandle); /* make a body */ if (some_text) { /* bgcolor */ sprintf (some_text, "<body bgcolor=\"%s\"", (bodybgcolor) ? bodybgcolor : "White" ); output_string (FALSE, some_text, outhandle); sprintf (some_text, " text=\"%s\"", (bodytext) ? bodytext : "Black" ); output_string (FALSE, some_text, outhandle); sprintf (some_text, " link=\"%s\"", (bodylink) ? bodylink : "Blue" ); output_string (FALSE, some_text, outhandle); output_string (FALSE, ">\n", outhandle); free (some_text); } else { output_string (FALSE, "<body bgcolor=\"White\" text=\"Black\" link=\"Blue\">\n", outhandle); } } /* Speciel input file */ /* Emails should never use Atari charset, so we assumes iso-8859-1 is used */ /* Quoted-printable and mime encoded emails are _not_ decoded */ /* The complete header is inserted into a comment (outside the <pre> tags ) */ /* Valid fields are 'saved' and later output'd */ if (emailsource) { int email_header_mode = TRUE; int line_must_out = FALSE; unsigned char *emh_line = (unsigned char*) malloc (2000+1); /* many $ mailers breaks the rules */ unsigned char *p; struct emh_text_entry *current, *emh_work_list; current = emh_work_list = (struct emh_text_entry*) calloc (1, sizeof (struct emh_text_entry)); if (emh_line && emh_work_list) { output_string (FALSE, "\n", outhandle); while (!feof (inhandle) && email_header_mode) { emh_line[0] = '\0'; if (fgets (emh_line, 2000, inhandle)) { if (emh_line[0] == '\r' || emh_line[0] == '\n') { email_header_mode = FALSE; /* marker reached */ line_must_out = FALSE; } else if (emh_line[0] > ' ') { int i = 0; line_must_out = FALSE; /* check for a valid field to be saved */ while (!line_must_out && show_emh_field[i] ) { if (!strncasecmp (emh_line, show_emh_field[i], strlen (show_emh_field[i]))) { line_must_out = TRUE; } i++; } } /* Save the field */ if (line_must_out && current) { if (!current->text) current->text = (unsigned char*) strdup (emh_line); if (current->next = (struct emh_text_entry*) calloc (1, sizeof (struct emh_text_entry))) current = current->next; } output_string (FALSE, "\n", outhandle); } /* if fgets */ } /* while */ output_string (FALSE, "\n", outhandle); if (!nopre) output_string (FALSE, pre_start_tag, outhandle); /* output the fields to be displayed */ current = emh_work_list; while (current) { /* lets parse and output fields */ if (current->text) { register unsigned char *p = current->text; while (*p) { c = *p++; if ( c <= ' ' || c == '<' || c == '>' || c == '\"' || c=='(' || c == ';' || c=='\'' || c==')' || c=='/') output_parse_char (TRUE, c, outhandle); else { /* build a word */ int i = 0; word[i++] = (unsigned char) c; while (*p && c > ' ' && c!= '<' && c!= '>' && c!='\"') { c = *p++; if ( c > ' ' && c!= '<' && c!= '>' && c != '\"') if (i < WORD_LENGTH) word[i++] = (unsigned char) c; } word[i] = '\0'; output_word (TRUE, word, outhandle); output_parse_char (TRUE, c, outhandle); } } } current = current->next; } if (!nopre) output_string (FALSE, pre_end_tag, outhandle); if (separator_html) { output_string (FALSE, separator_html, outhandle); output_string (FALSE, "\n", outhandle); } else if (nopre) output_string (FALSE, "<br>\n", outhandle); /* Cleanup the malloc'ed mess */ free (emh_line); current = emh_work_list; while (current) { register struct emh_text_entry *temp; if (current->text) free (current->text); temp = current->next; free (current); current = temp; } } /* if */ } /* End of speciel input file */ /* And the text (body) */ if (!nopre) output_string (FALSE, pre_start_tag, outhandle); word[0] = '\0'; while (!feof (inhandle)) { c = fgetc (inhandle); if ( c >= 0) /* c will be <0 if eof reached */ { if ( c <= ' ' || c == '<' || c == '>' || c == '\"' || c=='(' || c == ';' || c=='\'' || c==')' || c=='/') output_parse_char (TRUE, c, outhandle); /* not the beginning of a word, so just output it */ else { /* build a word */ int i = 0; word[i++] = (unsigned char) c; while (!feof (inhandle) && c > ' ' && c!= '<' && c!= '>' && c!='\"') { c = fgetc (inhandle); if ( c > ' ' && c!= '<' && c!= '>' && c != '\"') if (i < WORD_LENGTH) word[i++] = (unsigned char) c; } word[i] = '\0'; output_word (TRUE, word, outhandle); if (!feof (inhandle)) output_parse_char (TRUE, c, outhandle); } } } if (inhandle != stdin) /* clean close */ fclose (inhandle); /* Now write the end tags */ output_string (FALSE, "\n", outhandle); /* ensure that the end tags are starting on a new line */ if (!nopre) output_string (FALSE, pre_end_tag, outhandle); if (footer_html) { output_string (FALSE, footer_html, outhandle); output_string (FALSE, "\n", outhandle); /* ensures that the next tags are starting on a new line */ } if (!notags) output_string (FALSE, bottom_tags, outhandle); if (outhandle != stdout) fclose (outhandle); /* well, should check the return value here */ } else { char *sometext = (char*) malloc (1000); /* should be enough */ if (sometext) { sprintf (sometext, "Can not open inputfile [%s] or outputfile [%s]\n", (input_filename ? input_filename : "stdin"), (output_filename ? output_filename : "stdout")); output_error_text (sometext); free (sometext); } return 1; } return 0; } /* * Print some errortext to stderr * Errortext will no be printed if in quiet mode */ void output_error_text (char *string) { if (!quiet) fprintf (stderr, string); } /* * Return the address of first letter in the filename * argument string is a path+filename * ex: d:\directory\filename.ext * ^string ^return */ char * find_filename (char *string) { register char *p=string; if (string) { p += strlen (string); while (p>string && *(p-1) !='\\' && *(p-1) !=':' && *(p-1)!='/' ) p--; } return p; } /* * Outputs a char to the html file * No charset parsing * If the char is a LF (ascii 10), then it will finish the line. */ void output_char (int allow_br, int c, FILE *handle) { if (c == 10) { if (nopre && !nobr && allow_br) /* end of line and no pre -> we make the line ends with <br> */ output_string (FALSE, "<br>", handle); /* note! recursive! Never \n in this one!! */ if (crlf) fputc (13, handle); } fputc (c, handle); } /* * Outputs a string to the html file * No charset parsing */ void output_string (int allow_br, const char *s, FILE *handle) { if (s) while (*s) output_char (allow_br, *s++, handle); } /* * Outputs a char to the html file * Char will be charset parsed */ void output_parse_char (int allow_br, unsigned int c, FILE *handle) { if (method == ATARICHARSET) c = convertAtariToIso88591[ c & 0xff ]; output_string (allow_br, convertIso88591ToHtml[ c & 0xff ], handle); } /* * Outputs a string to the html file * String will be charset parsed */ void output_parse_string (int allow_br, const char *s, FILE *handle) { register unsigned int c; if (s) while (*s) { c = (unsigned int) *s++; if (method == ATARICHARSET) c = convertAtariToIso88591[ c & 0xff ]; output_string (allow_br, convertIso88591ToHtml[ c & 0xff], handle); } } /* * check word for any url style in it and make a link. * outputs the word too. * Not 100% fool-proof, but shouldn't miss too many url's. */ void output_word (int allow_br, const unsigned char *word, FILE *handle) { unsigned char *url_checker = NULL; unsigned char *email_checker = NULL; if ( !nolinks && (url_checker = strchr (word, ':'))) { /* examples: (http://localhost/) "ftp://sunsite.auc.dk". */ unsigned char url_host[WORD_LENGTH+1]; int url_ok = FALSE; int url_index = 0; int pre_chars = (int)(url_checker - word); /* check for <url:http://...> type */ /* if so, then move the url_checker pointer to the second : */ if (pre_chars >= strlen ("url") ) if (!strncasecmp (url_checker-(long)strlen ("url"), "url", strlen ("url") )) { unsigned char *p = url_checker; url_checker = strchr (url_checker+1, ':'); /* find next : */ pre_chars = (int) (url_checker - p - 1); /* recalculate it */ } /* look for a valid urltype */ while ( url_types[url_index] && !url_ok && url_checker) /* an url_type, not found and : */ { if (pre_chars == strlen (url_types[url_index]) ) /* enough (exact) chars in word before : */ { if (!strncasecmp (url_checker-(long)strlen (url_types[url_index]), url_types[url_index], strlen (url_types[url_index]) )) { /* found a match */ register unsigned char *p = NULL; int count = (int)(url_checker - word +1); /* chars before the url */ strcpy (url_host, url_checker+1); /* now... remove any " */ if (*url_host) if (p = strchr (url_host, '\"')) *p = '\0'; /* remove any < - these can't be in the word, but the " removing * thing above could dig something nasty out of the url. */ if (*url_host) if (p = strchr (url_host, '<')) *p = '\0'; /* remove any > */ if (*url_host) if (p = strchr (url_host, '>')) *p = '\0'; /* remove trailing ,.;\?! */ if (*url_host) { p = url_host + strlen (url_host) - 1; while ((p >= url_host) && strchr (",.;:!?\\", (int)*p)) /* protect me */ *p-- = '\0'; } /* count () and remove unbalanced ) in url */ if (*url_host) /* still something left? */ { int par_start = 0; int par_end = 0; unsigned char *p2 = NULL; p = url_host; while (p = strchr (p, '(')) { par_start++; p++; } p = url_host; while (p = strchr (p, ')')) { par_end++; p++; } while (par_end-- > par_start) /* remove em */ if (p2 = strrchr (url_host, ')')) *p2 = '\0'; } /* make a small validator */ if (*url_host) { p = url_host; /* skip / */ while (*p == '/') p++; if (*p) { int trailing_chars = (strlen (url_checker+1) - strlen (url_host)) > 0 ? TRUE : FALSE; url_ok = TRUE; /* yup, let's make a link */ /* a little string copy and manipulation */ output_string (allow_br, "<a href=\"", handle); output_string (allow_br, url_types[url_index], handle); output_char (allow_br, ':', handle); output_string (allow_br, url_host, handle); output_string (allow_br, "\">", handle); if (trailing_chars) { int url_length = (int) strlen (url_host); /* find what are going to be hightlighted */ strcpy (url_host, word); /* copy all - using the array */ url_host[count + url_length] = '\0'; /* zero it */ /* highlight stuff */ output_parse_string (allow_br, url_host, handle); /* and what's left are not going to be highlighted */ output_string (allow_br, "</a>", handle); /* let's output that */ output_parse_string (allow_br, &word[count + url_length], handle); } else { output_parse_string (allow_br, word, handle); output_string (allow_br, "</a>", handle); } } } } } url_index++; } if (!url_ok) { /* not a valid url */ output_parse_string (allow_br, word, handle); } } else if (!nomail && (email_checker = strchr (word, '@'))) { /* or perhaps a email address * examples <tommya@post3.tele.dk> (me@here) him@there "user@anywhere" */ int email_ok = FALSE; unsigned char email_address[WORD_LENGTH+1]; register unsigned char *p = (unsigned char *) word; strcpy (email_address, p); p = email_address; if (*p == '@') /* start with a @ ?? */ *p = '\0'; /* surely an illegal emailaddress */ while (*p && !strchr (">\"\'\\);", (int) *p)) /* look for ">');\ that ends the emailaddress */ p++; *p = '\0'; /* found the end of the emailaddress */ /* remove _trailing_ dots, commas, !, ?. Still legal: user@host?subject */ if (*email_address) { p = email_address + strlen (email_address) - 1; while ((p >= email_address) && strchr (",.?!:", (int)*p)) /* protect me */ *p-- = '\0'; } email_checker = strchr (email_address, '@'); /* better check again for @ */ if (email_checker && strlen (email_checker+1) && !strchr (email_checker+1, '@')) /* a host, length and no double @ ?? */ { /* okay, lets make a mailto */ email_ok = TRUE; output_string (allow_br, "<a href=\"mailto:", handle); output_string (allow_br, email_address, handle); output_string (allow_br, "\">", handle); if (strlen (email_address) != strlen (word)) { output_parse_string (allow_br, email_address, handle); output_string (allow_br, "</a>", handle); /* not highlighted stuff */ output_parse_string (allow_br, &word[(int)strlen (email_address)], handle); } else /* no trailing things */ { output_parse_string (allow_br, word, handle); output_string (allow_br, "</a>", handle); } } if (!email_ok) { /* not a valid url */ output_parse_string (allow_br, word, handle); } } else /* nop, just parse the string */ output_parse_string (allow_br, word, handle); }